library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✓ ggplot2 3.3.3 ✓ purrr 0.3.4
✓ tibble 3.1.2 ✓ dplyr 1.0.6
✓ tidyr 1.1.3 ✓ stringr 1.4.0
✓ readr 1.4.0 ✓ forcats 0.5.1
── Conflicts ─────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(modelr)
set.seed(64)
Data Preperation
heights$income <- heights$income/1000
heights <- heights[-c(721,973,1611,1641,1866,2828,3122,6178,6319,6794),]
heights
Split Data into 6 Cross Validation Intervals
cv <- crossv_kfold(heights, k=6)
cv
Fit Models to each training set
models1 <- map(cv$train, ~lm(income ~ height, data = .))
models2 <- map(cv$train, ~lm(income ~ sex, data = .))
models3 <- map(cv$train, ~lm(income ~ education, data = .))
Get Predictions using the models
get_pred <- function(model, test_data) {
data <- as.data.frame(test_data)
pred <- add_predictions(data, model)
return(pred)
}
pred1 <- map2_df(models1, cv$test, get_pred, .id = "Run")
pred2 <- map2_df(models2, cv$test, get_pred, .id = "Run")
pred3 <- map2_df(models3, cv$test, get_pred, .id = "Run")
calculate the MSE for each group
MSE1 <- pred1 %>% group_by(Run) %>%
summarise(MSE = mean( (income - pred)^2))
MSE1
MSE2 <- pred2 %>% group_by(Run) %>%
summarise(MSE = mean( (income - pred)^2))
MSE2
MSE3 <- pred3 %>% group_by(Run) %>%
summarise(MSE = mean( (income - pred)^2))
MSE3
Compare the models
paste("Model 1 MSE:", mean(MSE1$MSE))
[1] "Model 1 MSE: 2978.99924246456"
paste("Model 2 MSE:", mean(MSE2$MSE))
[1] "Model 2 MSE: 2984.04148812215"
paste("Model 3 MSE:", mean(MSE3$MSE))
[1] "Model 3 MSE: 2645.51716314473"
LS0tCnRpdGxlOiAiQWN0aXZpdHkgNCIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KG1vZGVscikKCnNldC5zZWVkKDY0KQpgYGAKCiMgRGF0YSBQcmVwZXJhdGlvbgoKYGBge3J9CmhlaWdodHMkaW5jb21lIDwtIGhlaWdodHMkaW5jb21lLzEwMDAKaGVpZ2h0cyA8LSBoZWlnaHRzWy1jKDcyMSw5NzMsMTYxMSwxNjQxLDE4NjYsMjgyOCwzMTIyLDYxNzgsNjMxOSw2Nzk0KSxdCmhlaWdodHMKYGBgCgojIFNwbGl0IERhdGEgaW50byA2IENyb3NzIFZhbGlkYXRpb24gSW50ZXJ2YWxzCgpgYGB7cn0KY3YgIDwtIGNyb3Nzdl9rZm9sZChoZWlnaHRzLCBrPTYpCmN2CmBgYAoKIyBGaXQgTW9kZWxzIHRvIGVhY2ggdHJhaW5pbmcgc2V0CgpgYGB7cn0KbW9kZWxzMSAgPC0gbWFwKGN2JHRyYWluLCB+bG0oaW5jb21lIH4gaGVpZ2h0LCBkYXRhID0gLikpCm1vZGVsczIgIDwtIG1hcChjdiR0cmFpbiwgfmxtKGluY29tZSB+IHNleCwgZGF0YSA9IC4pKQptb2RlbHMzICA8LSBtYXAoY3YkdHJhaW4sIH5sbShpbmNvbWUgfiBlZHVjYXRpb24sIGRhdGEgPSAuKSkKYGBgCgojIEdldCBQcmVkaWN0aW9ucyB1c2luZyB0aGUgbW9kZWxzCgpgYGB7cn0KZ2V0X3ByZWQgPC0gZnVuY3Rpb24obW9kZWwsIHRlc3RfZGF0YSkgewogIGRhdGEgPC0gYXMuZGF0YS5mcmFtZSh0ZXN0X2RhdGEpCiAgcHJlZCA8LSBhZGRfcHJlZGljdGlvbnMoZGF0YSwgbW9kZWwpCiAgcmV0dXJuKHByZWQpCn0KCnByZWQxICA8LSBtYXAyX2RmKG1vZGVsczEsIGN2JHRlc3QsIGdldF9wcmVkLCAuaWQgPSAiUnVuIikKcHJlZDIgIDwtIG1hcDJfZGYobW9kZWxzMiwgY3YkdGVzdCwgZ2V0X3ByZWQsIC5pZCA9ICJSdW4iKQpwcmVkMyAgPC0gbWFwMl9kZihtb2RlbHMzLCBjdiR0ZXN0LCBnZXRfcHJlZCwgLmlkID0gIlJ1biIpCmBgYAoKIyBjYWxjdWxhdGUgdGhlIE1TRSBmb3IgZWFjaCBncm91cAoKYGBge3J9Ck1TRTEgIDwtIHByZWQxICU+JSBncm91cF9ieShSdW4pICU+JSAKICBzdW1tYXJpc2UoTVNFID0gbWVhbiggKGluY29tZSAtIHByZWQpXjIpKQpNU0UxCmBgYAoKYGBge3J9Ck1TRTIgIDwtIHByZWQyICU+JSBncm91cF9ieShSdW4pICU+JSAKICBzdW1tYXJpc2UoTVNFID0gbWVhbiggKGluY29tZSAtIHByZWQpXjIpKQpNU0UyCmBgYAoKYGBge3J9Ck1TRTMgIDwtIHByZWQzICU+JSBncm91cF9ieShSdW4pICU+JSAKICBzdW1tYXJpc2UoTVNFID0gbWVhbiggKGluY29tZSAtIHByZWQpXjIpKQpNU0UzCmBgYAoKIyBDb21wYXJlIHRoZSBtb2RlbHMKCmBgYHtyfQpwYXN0ZSgiTW9kZWwgMSBNU0U6IiwgbWVhbihNU0UxJE1TRSkpCnBhc3RlKCJNb2RlbCAyIE1TRToiLCBtZWFuKE1TRTIkTVNFKSkKcGFzdGUoIk1vZGVsIDMgTVNFOiIsIG1lYW4oTVNFMyRNU0UpKQpgYGAKCgo=